In [1]:
from scipy.interpolate import interp1d
In [2]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
In [3]:
def estimate_mean(data, weight):
return np.sum(data * weight) / np.sum(weight)
def estimate_std(data, weight, mean):
variance = np.sum(weight * (data - mean)**2) / np.sum(weight)
return np.sqrt(variance)
In [4]:
np.random.seed(110) # for reproducible random results
# set parameters
red_mean = 3
red_std = 0.8
blue_mean = 7
blue_std = 2
In [5]:
# draw 20 samples from normal distributions with red/blue parameters
red = np.random.normal(red_mean, red_std, size=20)
blue = np.random.normal(blue_mean, blue_std, size=20)
both_colours = np.sort(np.concatenate((red, blue))) # for later use...
In [6]:
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.show()
In [7]:
# estimates for the mean
red_mean_guess = 1.1
blue_mean_guess = 9
# estimates for the standard deviation
red_std_guess = 2
blue_std_guess = 1.7
In [8]:
likelihood_of_red = stats.norm(red_mean_guess, red_std_guess).pdf(both_colours)
likelihood_of_blue = stats.norm(blue_mean_guess, blue_std_guess).pdf(both_colours)
In [9]:
f1 = interp1d(both_colours, likelihood_of_red, kind='cubic')
f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.plot(xnew, f1(xnew), c='#FFCCCC')
plt.plot(xnew, f2(xnew), c='#CCCCFF')
plt.show()
In [10]:
likelihood_total = likelihood_of_red + likelihood_of_blue
red_weight = likelihood_of_red / likelihood_total
blue_weight = likelihood_of_blue / likelihood_total
In [11]:
# new estimates for standard deviation
blue_std_guess = estimate_std(both_colours, blue_weight, blue_mean_guess)
red_std_guess = estimate_std(both_colours, red_weight, red_mean_guess)
# new estimates for mean
red_mean_guess = estimate_mean(both_colours, red_weight)
blue_mean_guess = estimate_mean(both_colours, blue_weight)
print blue_std_guess, blue_mean_guess
print red_std_guess, red_mean_guess
In [12]:
# estimates for the mean
red_mean_guess = 1.1
blue_mean_guess = 9
# estimates for the standard deviation
red_std_guess = 2
blue_std_guess = 1.7
for i in range(20):
likelihood_of_red = stats.norm(red_mean_guess, red_std_guess).pdf(both_colours)
likelihood_of_blue = stats.norm(blue_mean_guess, blue_std_guess).pdf(both_colours)
f1 = interp1d(both_colours, likelihood_of_red, kind='cubic')
f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.plot(xnew, f1(xnew), c='#FFCCCC')
plt.plot(xnew, f2(xnew), c='#CCCCFF')
likelihood_total = likelihood_of_red + likelihood_of_blue
red_weight = likelihood_of_red / likelihood_total
blue_weight = likelihood_of_blue / likelihood_total
# new estimates for standard deviation
blue_std_guess = estimate_std(both_colours, blue_weight, blue_mean_guess)
red_std_guess = estimate_std(both_colours, red_weight, red_mean_guess)
# new estimates for mean
red_mean_guess = estimate_mean(both_colours, red_weight)
blue_mean_guess = estimate_mean(both_colours, blue_weight)
f1 = interp1d(both_colours, likelihood_of_red, kind='cubic')
f2 = interp1d(both_colours, likelihood_of_blue, kind='cubic')
xnew = np.linspace(min(both_colours), max(both_colours), num=41, endpoint=True)
plt.scatter(red, np.zeros(len(red)), c='red')
plt.scatter(blue, np.zeros(len(blue)), c='blue')
plt.plot(xnew, f1(xnew), c='#FF0000')
plt.plot(xnew, f2(xnew), c='#0000FF')
plt.show()